selenium升為4之後有一些寫法與3不同,詳情可參考連結:
本文中的寫法仍是以selenium 3.14.0版本為主
使用selenium爬104網站前,先試看看104搜尋列可以搜的條件,並歸納出需要的搜尋條件有以下幾個:
1.地區:台中市,台北市
2.職務類別:軟韌體測試工程師,測試人員
3.關鍵字:QA 測試
將以上條件放在一個列表中,搜尋的代碼如下:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
# from selenium.
import time
import re
from bs4 import BeautifulSoup
# import pandas as pd
# import xlsxwriter
class Crawler:
def __init__(self,driver):
self.driver=driver
def get_104_jobs(self):
service = Service()
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)
url="https://www.104.com.tw/jobs/main/"
driver.get(url)
wait=WebDriverWait(driver,15)
#搜尋條件
search_items=['台北市','台中市','軟韌體','人員','QA','測試']
#選地區
city_select_box = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#icity")))
city_select_box.click()
for city in search_items[:1]:
wait.until(
EC.presence_of_element_located((By.XPATH, f'//span[text()="{city}"]/..//input'))).click()
driver.find_element(By.XPATH,"//button[text()='確定']").click()
#選職類
job_types=wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#ijob")))
job_types.click()
#職類搜尋bar搜尋"測試"
wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'.category-picker-o-search'))
).click()
type_search_bar=wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'.search-bar'))
)
type_search_bar.send_keys(search_items[-1])
wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'.result-area-title'))
)
for type in search_items[2:4]:
driver.find_element(By.XPATH,f'//span[@class="item-desc" and contains(text(),"{type}")]').click()
driver.find_element(By.XPATH,"//button[text()='確定']").click()
#輸入查詢職稱
job_ele = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#ikeyword")))
job_ele.send_keys(f'{search_items[-1]} {search_items[-2]}')
driver.find_element(By.CSS_SELECTOR,".js-formCheck").click()
#點搜尋後跳轉
wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'.job-list-body'))
)
time.sleep(3)
if __name__ == "__main__":
search_job=Crawler(driver='./chromedriver.exe')
# search_job.get_104_jobs(keyword=input('請輸入想搜尋的工作:'))
search_job.get_104_jobs()